Close

@InProceedings{SantosSebeAlme:2019:AcReCo,
               author = "Santos, Samuel Felipe dos and Sebe, Nicu and Almeida, Jurandy",
          affiliation = "Universidade Federal de S{\~a}o Paulo - UNIFESP, Brazil and 
                         University of Trento - UniTn, Italy and Universidade Federal de 
                         S{\~a}o Paulo - UNIFESP, Brazil",
                title = "CV-C3D: Action Recognition on Compressed Videos with Convolutional 
                         3D Networks",
            booktitle = "Proceedings...",
                 year = "2019",
               editor = "Oliveira, Luciano Rebou{\c{c}}as de and Sarder, Pinaki and Lage, 
                         Marcos and Sadlo, Filip",
         organization = "Conference on Graphics, Patterns and Images, 32. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "computer vision, action recognition, deep learning, compressed 
                         domain, efficiency.",
             abstract = "Action recognition in videos has gained substantial attention from 
                         the computer vision community due to the wide range of possible 
                         applications. Recent works have addressed this problem with deep 
                         learning methods. The main limitation of existing approaches is 
                         their difficulty to learn temporal dynamics due to the high 
                         computational load demanded for processing huge amounts of data 
                         required to train a model. To overcome this problem, we propose a 
                         Compressed Video Convolutional 3D network (CV-C3D). It exploits 
                         information from the compressed representation of a video in order 
                         to avoid the high computational cost for fully decoding the video 
                         stream. The speed up of the computation enables our network to use 
                         3D convolutions for capturing the temporal context efficiently. 
                         Our network has the lowest computational complexity among all the 
                         compared approaches. Results of our approach in the task of action 
                         recognition on two public benchmarks, UCF-101 and HMDB-51, were 
                         comparable to the baselines, with the advantage of running at 
                         faster inference speed.",
  conference-location = "Rio de Janeiro, RJ, Brazil",
      conference-year = "28-31 Oct. 2019",
                  doi = "10.1109/SIBGRAPI.2019.00012",
                  url = "http://dx.doi.org/10.1109/SIBGRAPI.2019.00012",
             language = "en",
                  ibi = "8JMKD3MGPEW34M/3U2KG6S",
                  url = "http://urlib.net/ibi/8JMKD3MGPEW34M/3U2KG6S",
           targetfile = "118paper.pdf",
        urlaccessdate = "2024, Apr. 28"
}


Close